# loading libraries
library(tidyverse)
library(nycflights13)
# modifying chart size
options(repr.plot.width=5, repr.plot.height=3)
Exercise 1
Find all flights that
- Had an arrival delay of two or more hours
- Flew to Houston (IAH or HOU)
- Were operated by United, American, or Delta (hint: use
airline dataset to get the two-character carrier codes)
- Departed in summer (July, August, and September)
- Arrived more than two hours late, but didn’t leave late
- Departed by at least an hour delay, but made up over 30 minutes in flight
- Departed between midnight and 6am (inclusive)
Answer:
- Had an arrival delay of two or more hours
filter(flights, arr_delay >= 120)
- Flew to Houston (IAH or HOU)
filter(flights, dest == "IAH" | dest == "HOU")
# or filter(flights, dest %in% c("IAH", "HOU"))
- Were operated by United, American, or Delta (hint: use
airline dataset to get the two-character carrier codes)
airlines
filter(flights, carrier %in% c("AA", "DL", "UA"))
- Departed in summer (July, August, and September)
filter(flights, month %in% c(7, 8, 9))
- Arrived more than two hours late, but didn’t leave late
filter(flights, dep_delay <= 0, arr_delay > 120)
- Departed by at least an hour delay, but made up over 30 minutes in flight
filter(flights, dep_delay >= 60, dep_delay - arr_delay > 30)
- Departed between midnight and 6am (inclusive)
filter(flights, dep_time <= 600 | dep_time == 2400)
Exercise 2
Another useful dplyr filtering helper is between(). What does it do? Can you use it to simplify the code needed to answer the previous exercise, part 4?
Answer:
?between
filter(flights, between(month, 7, 9))
Exercise 3
How many flights have a missing dep_time? What other variables are missing? What might these rows represent?
Answer:
8255 flights have missing dep_time.
arr_time is also missing for these rows. Seems to be canceled flights.
filter(flights, is.na(dep_time))
Exercise 4
- How could you use
arrange() to sort all missing values to the start? (Hint: use is.na() and apply your answer to the df dataframe defined below):
(df <- tibble(x = c(5, 6, 2, NA)))
- Apply this answer to
flights dataset to list the canceled flights first (assuming NA for dep_delay means the flight was canceled) and then show other flights starting with the ones with large dep_delay in a descending order.
Answer:
# missing value first
arrange(df, !is.na(x))
# missing value first, then descending
arrange(df, !is.na(x), desc(x))
arrange(flights, !is.na(dep_delay), desc(dep_delay))
We could’ve also used desc(is.na(dep_time)) instead of !is.na(dep_delay), desc(dep_delay).
Exercise 5
Brainstorm as many ways as possible to select dep_time, dep_delay, arr_time, and arr_delay from flights.
Answer:
select(flights, dep_time, dep_delay, arr_time, arr_delay)
All the following will result the same:
select(flights, "dep_time", "dep_delay", "arr_time", "arr_delay")
select(flights, 4, 6, 7, 9)
select(flights, 4:9, -c(5,8))
select(flights, starts_with("dep_"), starts_with("arr_"))
variables <- c("dep_time", "dep_delay", "arr_time", "arr_delay")
select(flights, variables)
Exercise 6
What happens if you include the name of a variable multiple times in a select() call?
Answer:
It will show up only once:
select(flights, dep_time, dep_delay, dep_time)
Exercise 7
Does the result of running the following code surprise you? How do the select helpers deal with case by default? Can you change that default?
select(flights, contains("TIME"))
Answer:
select(flights, contains("TIME", ignore.case = FALSE))
ignore.case = TRUE is the default, so we don’t even need to specify it:
select(flights, contains("TIME", ignore.case = TRUE))
LS0tCnRpdGxlOiAiU29sdXRpb24iCnN1YnRpdGxlOiAiRGF0YSBUcmFuc2Zvcm1hdGlvbiAtIFBhcnQgMSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgbG9hZGluZyBsaWJyYXJpZXMKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobnljZmxpZ2h0czEzKQoKIyBtb2RpZnlpbmcgY2hhcnQgc2l6ZQpvcHRpb25zKHJlcHIucGxvdC53aWR0aD01LCByZXByLnBsb3QuaGVpZ2h0PTMpCmBgYAoKIyMjIEV4ZXJjaXNlIDEKRmluZCBhbGwgZmxpZ2h0cyB0aGF0CgoxLiBIYWQgYW4gYXJyaXZhbCBkZWxheSBvZiB0d28gb3IgbW9yZSBob3VycwoyLiBGbGV3IHRvIEhvdXN0b24gKElBSCBvciBIT1UpCjMuIFdlcmUgb3BlcmF0ZWQgYnkgVW5pdGVkLCBBbWVyaWNhbiwgb3IgRGVsdGEgKGhpbnQ6IHVzZSBgYWlybGluZWAgZGF0YXNldCB0byBnZXQgdGhlIHR3by1jaGFyYWN0ZXIgY2FycmllciBjb2RlcykKNC4gRGVwYXJ0ZWQgaW4gc3VtbWVyIChKdWx5LCBBdWd1c3QsIGFuZCBTZXB0ZW1iZXIpCjUuIEFycml2ZWQgbW9yZSB0aGFuIHR3byBob3VycyBsYXRlLCBidXQgZGlkbid0IGxlYXZlIGxhdGUKNi4gRGVwYXJ0ZWQgYnkgYXQgbGVhc3QgYW4gaG91ciBkZWxheSwgYnV0IG1hZGUgdXAgb3ZlciAzMCBtaW51dGVzIGluIGZsaWdodAo3LiBEZXBhcnRlZCBiZXR3ZWVuIG1pZG5pZ2h0IGFuZCA2YW0gKGluY2x1c2l2ZSkKCioqQW5zd2VyOioqCgoxLiBIYWQgYW4gYXJyaXZhbCBkZWxheSBvZiB0d28gb3IgbW9yZSBob3VycwpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGFycl9kZWxheSA+PSAxMjApCmBgYAoyLiBGbGV3IHRvIEhvdXN0b24gKElBSCBvciBIT1UpCmBgYHtyfQpmaWx0ZXIoZmxpZ2h0cywgZGVzdCA9PSAiSUFIIiB8IGRlc3QgPT0gIkhPVSIpCiMgb3IgZmlsdGVyKGZsaWdodHMsIGRlc3QgJWluJSBjKCJJQUgiLCAiSE9VIikpCmBgYAoKMy4gV2VyZSBvcGVyYXRlZCBieSBVbml0ZWQsIEFtZXJpY2FuLCBvciBEZWx0YSAoaGludDogdXNlIGBhaXJsaW5lYCBkYXRhc2V0IHRvIGdldCB0aGUgdHdvLWNoYXJhY3RlciBjYXJyaWVyIGNvZGVzKQpgYGB7cn0KYWlybGluZXMKYGBgCgpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGNhcnJpZXIgJWluJSBjKCJBQSIsICJETCIsICJVQSIpKQpgYGAKCgo0LiBEZXBhcnRlZCBpbiBzdW1tZXIgKEp1bHksIEF1Z3VzdCwgYW5kIFNlcHRlbWJlcikKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBtb250aCAlaW4lICBjKDcsIDgsIDkpKQpgYGAKCjUuIEFycml2ZWQgbW9yZSB0aGFuIHR3byBob3VycyBsYXRlLCBidXQgZGlkbid0IGxlYXZlIGxhdGUKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBkZXBfZGVsYXkgPD0gMCwgYXJyX2RlbGF5ID4gMTIwKQpgYGAKCjYuIERlcGFydGVkIGJ5IGF0IGxlYXN0IGFuIGhvdXIgZGVsYXksIGJ1dCBtYWRlIHVwIG92ZXIgMzAgbWludXRlcyBpbiBmbGlnaHQKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBkZXBfZGVsYXkgPj0gNjAsIGRlcF9kZWxheSAtIGFycl9kZWxheSA+IDMwKQpgYGAKCjcuIERlcGFydGVkIGJldHdlZW4gbWlkbmlnaHQgYW5kIDZhbSAoaW5jbHVzaXZlKQpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGRlcF90aW1lIDw9IDYwMCB8IGRlcF90aW1lID09IDI0MDApCmBgYAoKLS0tCgojIyMgRXhlcmNpc2UgMgpBbm90aGVyIHVzZWZ1bCBkcGx5ciBmaWx0ZXJpbmcgaGVscGVyIGlzIGBiZXR3ZWVuKClgLiBXaGF0IGRvZXMgaXQgZG8/IENhbiB5b3UgdXNlIGl0IHRvIHNpbXBsaWZ5IHRoZSBjb2RlIG5lZWRlZCB0byBhbnN3ZXIgdGhlIHByZXZpb3VzIGV4ZXJjaXNlLCBwYXJ0IDQ/CgoqKkFuc3dlcjoqKgoKYGBge3IgZXZhbD1GQUxTRX0KP2JldHdlZW4KYGBgCgpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGJldHdlZW4obW9udGgsIDcsIDkpKQpgYGAKCi0tLQoKIyMjIEV4ZXJjaXNlIDMKSG93IG1hbnkgZmxpZ2h0cyBoYXZlIGEgbWlzc2luZyBgZGVwX3RpbWVgPyBXaGF0IG90aGVyIHZhcmlhYmxlcyBhcmUgbWlzc2luZz8gV2hhdCBtaWdodCB0aGVzZSByb3dzIHJlcHJlc2VudD8KCioqQW5zd2VyOioqCgpgciBmaWx0ZXIoZmxpZ2h0cywgaXMubmEoZGVwX3RpbWUpKSAlPiUgbnJvd2AgZmxpZ2h0cyBoYXZlIG1pc3NpbmcgYGRlcF90aW1lYC4KCmBhcnJfdGltZWAgaXMgYWxzbyBtaXNzaW5nIGZvciB0aGVzZSByb3dzLiBTZWVtcyB0byBiZSBjYW5jZWxlZCBmbGlnaHRzLgoKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBpcy5uYShkZXBfdGltZSkpCmBgYAoKCi0tLQoKIyMjIEV4ZXJjaXNlIDQKMSkgSG93IGNvdWxkIHlvdSB1c2UgYGFycmFuZ2UoKWAgdG8gc29ydCBhbGwgbWlzc2luZyB2YWx1ZXMgdG8gdGhlIHN0YXJ0PyAoSGludDogdXNlIGBpcy5uYSgpYCBhbmQgYXBwbHkgeW91ciBhbnN3ZXIgdG8gdGhlIGBkZmAgZGF0YWZyYW1lIGRlZmluZWQgYmVsb3cpOgoKYGBge3J9CihkZiA8LSB0aWJibGUoeCA9IGMoNSwgNiwgMiwgTkEpKSkKYGBgCgoyKSBBcHBseSB0aGlzIGFuc3dlciB0byBgZmxpZ2h0c2AgZGF0YXNldCB0byBsaXN0IHRoZSBjYW5jZWxlZCBmbGlnaHRzIGZpcnN0IChhc3N1bWluZyBgTkFgIGZvciBgZGVwX2RlbGF5YCBtZWFucyB0aGUgZmxpZ2h0IHdhcyBjYW5jZWxlZCkgYW5kIHRoZW4gc2hvdyBvdGhlciBmbGlnaHRzIHN0YXJ0aW5nIHdpdGggdGhlIG9uZXMgd2l0aCBsYXJnZSBgZGVwX2RlbGF5YCBpbiBhIGRlc2NlbmRpbmcgb3JkZXIuCgoqKkFuc3dlcjoqKgoKMSkKCmBgYHtyfQojIG1pc3NpbmcgdmFsdWUgZmlyc3QKYXJyYW5nZShkZiwgIWlzLm5hKHgpKQpgYGAKCmBgYHtyfQojIG1pc3NpbmcgdmFsdWUgZmlyc3QsIHRoZW4gZGVzY2VuZGluZwphcnJhbmdlKGRmLCAhaXMubmEoeCksIGRlc2MoeCkpCmBgYAoKMikKCmBgYHtyfQphcnJhbmdlKGZsaWdodHMsICFpcy5uYShkZXBfZGVsYXkpLCBkZXNjKGRlcF9kZWxheSkpCmBgYAoKV2UgY291bGQndmUgYWxzbyB1c2VkIGBkZXNjKGlzLm5hKGRlcF90aW1lKSlgIGluc3RlYWQgb2YgYCFpcy5uYShkZXBfZGVsYXkpLCBkZXNjKGRlcF9kZWxheSlgLgoKLS0tCgojIyMgRXhlcmNpc2UgNQpCcmFpbnN0b3JtIGFzIG1hbnkgd2F5cyBhcyBwb3NzaWJsZSB0byBzZWxlY3QgYGRlcF90aW1lYCwgYGRlcF9kZWxheWAsIGBhcnJfdGltZWAsIGFuZCBgYXJyX2RlbGF5YCBmcm9tIGBmbGlnaHRzYC4KCioqQW5zd2VyOioqCgpgYGB7cn0Kc2VsZWN0KGZsaWdodHMsIGRlcF90aW1lLCBkZXBfZGVsYXksIGFycl90aW1lLCBhcnJfZGVsYXkpCmBgYAoKQWxsIHRoZSBmb2xsb3dpbmcgd2lsbCByZXN1bHQgdGhlIHNhbWU6CgpgYGB7ciBldmFsPUZBTFNFfQpzZWxlY3QoZmxpZ2h0cywgImRlcF90aW1lIiwgImRlcF9kZWxheSIsICJhcnJfdGltZSIsICJhcnJfZGVsYXkiKQoKc2VsZWN0KGZsaWdodHMsIDQsIDYsIDcsIDkpCgpzZWxlY3QoZmxpZ2h0cywgNDo5LCAtYyg1LDgpKQoKc2VsZWN0KGZsaWdodHMsIHN0YXJ0c193aXRoKCJkZXBfIiksIHN0YXJ0c193aXRoKCJhcnJfIikpCgp2YXJpYWJsZXMgPC0gYygiZGVwX3RpbWUiLCAiZGVwX2RlbGF5IiwgImFycl90aW1lIiwgImFycl9kZWxheSIpCnNlbGVjdChmbGlnaHRzLCB2YXJpYWJsZXMpCmBgYAoKCi0tLQoKIyMjIEV4ZXJjaXNlIDYKV2hhdCBoYXBwZW5zIGlmIHlvdSBpbmNsdWRlIHRoZSBuYW1lIG9mIGEgdmFyaWFibGUgbXVsdGlwbGUgdGltZXMgaW4gYSBgc2VsZWN0KClgIGNhbGw/CgoqKkFuc3dlcjoqKgoKSXQgd2lsbCBzaG93IHVwIG9ubHkgb25jZToKCmBgYHtyfQpzZWxlY3QoZmxpZ2h0cywgZGVwX3RpbWUsIGRlcF9kZWxheSwgZGVwX3RpbWUpCmBgYAoKLS0tCgojIyMgRXhlcmNpc2UgNwpEb2VzIHRoZSByZXN1bHQgb2YgcnVubmluZyB0aGUgZm9sbG93aW5nIGNvZGUgc3VycHJpc2UgeW91PyBIb3cgZG8gdGhlIGBzZWxlY3RgIGhlbHBlcnMgZGVhbCB3aXRoIGNhc2UgYnkgZGVmYXVsdD8gQ2FuIHlvdSBjaGFuZ2UgdGhhdCBkZWZhdWx0PwoKYGBge3J9CnNlbGVjdChmbGlnaHRzLCBjb250YWlucygiVElNRSIpKQpgYGAKCioqQW5zd2VyOioqCgpgYGB7cn0Kc2VsZWN0KGZsaWdodHMsIGNvbnRhaW5zKCJUSU1FIiwgaWdub3JlLmNhc2UgPSBGQUxTRSkpCmBgYAoKaWdub3JlLmNhc2UgPSBUUlVFIGlzIHRoZSBkZWZhdWx0LCBzbyB3ZSBkb24ndCBldmVuIG5lZWQgdG8gc3BlY2lmeSBpdDoKCmBgYHtyfQpzZWxlY3QoZmxpZ2h0cywgY29udGFpbnMoIlRJTUUiLCBpZ25vcmUuY2FzZSA9IFRSVUUpKQpgYGAK